/* This program first calculates U-stats, or AOC.  Then it finds the variance of AOC, using DeLong (1988) methodology  */

libname libone 'C:\Users\m40lxz1\OneDrive - Northern Illinois University\quality\sas\sasdata\';

/* The following steps input the annual rating and default data.  Change the input rating data for 
different CRA */

data rating;
 set libone.all_annual_rating;
 if fitch_rating ne . and ejr_rating ne .;
 rename fitch_rating=first_rating ejr_rating=second_rating;

data rating;
 set rating;
 if first_rating ne . and second_rating ne .; 
 keep first_rating second_rating year ID;

 run;


proc sql;
 create table list as
 select A.*, B.earliest 
 from rating as A
 left join libone.defaultlist_2023 as B
 on A.id=B.id;

data list;
 set list;
 if year ge 2014 and year le 2018;
* if year ge 2014 and year le 2020;


data list;
 set list;
 if year(earliest)-year le 2 and year(earliest)-year ge 0 then default=1; 
 else if earliest ne . and year(earliest)-year lt 0 then delete;
else default=0;
* if year(earliest)=year then default=1; 
* else default=0;
* drop earliest;
 run;

proc contents data=list;run;


data defaultlist;
 set list;
 if default=1;
 run;

proc sort data=defaultlist;
 by id year;run;

data defaultlist;
 set defaultlist;
 if id=lag(id) then delete;run;

 data survivelist;
  set list;
  if default=0;

  run;

proc sort data=survivelist;
 by id descending year;run;

data survivelist; 
set survivelist;
if first.id then yorder=0;
yorder+1;
by id;run;

data survivelist;
 set survivelist;
 if yorder=3; 

data list;
 set defaultlist survivelist;run;

run;

/*
data list;
 set list;
 *if default=1 and (year lt 2014 or year gt 2018) then delete;
*/


proc freq data=list;
 table default;run;

/************************************************************************************************/
/* The next few steps create a s*d combinations, where s is the number of surviving bonds and    */
/* d is the number of default bonds.  That is, for every surviving bond, it is matched with      */
/* every default bond.                                                                          */
/**************************************************************************************************/

data survive;
 set list;
 S_ID=_N_;
 rename first_rating=first_survive second_rating=second_survive;
if default=0;
 keep first_rating second_rating S_ID;

  data default;
 set list;
 D_ID=_N_; 
 rename first_rating=first_default second_rating=second_default;;
 if default=1;
 keep first_rating second_rating D_ID;

run;

  proc sql;													* creates a cartesian match of surviving and defaults; 
 create table comb1 as
 select *
 from survive
 cross join default;									

/* data comb1 has all possible combination of surviving and default bonds.  kernel is set to 1 if the rating    */
/* on the surviving bond is better than the default bond rating, 0.5 if the same, and 0 if worse.               */

data comb1;
 set comb1;
 if first_survive lt first_default then first_kernel=1;
 else if first_survive = first_default then first_kernel=0.5;
 else if first_survive gt first_default then first_kernel=0;

 if second_survive lt second_default then second_kernel=1;
 else if second_survive = second_default then second_kernel=0.5;
 else if second_survive gt second_default then second_kernel=0;

proc freq data=comb1 noprint;
 table first_kernel second_kernel;run;


proc means data=comb1 noprint;												* the mean kernel, or theta, is the U-stats, or AOC (see DeLong, page 839);
 var first_kernel second_kernel;
 output out =theta mean=first_theta second_theta;run;

data theta;
 set theta;
 keep first_theta second_theta; 
 run;


/********************************************************************************************************/
/* The next part calculate the variance of AOC. 													    */
/********************************************************************************************************/
proc sort data=comb1;
 by S_ID;

proc means data=comb1 noprint;
 var first_kernel second_kernel;
 output out=V01_S mean=first_kernel_survive second_kernel_survive;
 by S_ID;											*V01_S is the mean kernel for every surviving bonds;
 													*V01_S corresponds to DeLong's V01(Y), S(Y) being surviving bonds;
run;												 

data V01_S;
 set V01_S;
 rename _Freq_=num_default;							* num_default corresponds to Delong's m (number of X in Delong);
 drop _Type_; 
 run;

proc sort data=comb1;
 by D_ID;

proc means data=comb1 noprint;
 var first_kernel second_kernel;
 output out=V10_D mean=first_kernel_default second_kernel_default;
 by D_ID;

data V10_D;											*V10_D is the mean kernel for every default bonds;
 set V10_D;											*V10_D correspond to Delong's V10(X), D(X) being defaults;
 rename _Freq_=num_survive;							*num_survive corresponds to Delong's n (number of Y in DeLong); 
 drop _TYPE_;
run;

/* The next two steps input the theta (AOC) into V10_D and V01_S to calculate the variance  */

  proc sql;
 create table V01_S_N as
 select *
 from V01_S
 cross join theta;

  proc sql;
 create table V10_D_N as
 select *
 from V10_D
 cross join theta;

/*The next two steps calculates S01 ad S10.  See formula in page 840 of DeLong.  */

data V01_S_N;
 set V01_S_N;
 S01_first=(first_kernel_survive - first_theta)**2;
 S01_second=(second_kernel_survive - second_theta)**2;
 S01_cross=(first_kernel_survive-first_theta)*(second_kernel_survive-second_theta);

data V10_D_N;
 set V10_D_N;
 S10_first=(first_kernel_default-first_theta)**2;
 S10_second=(second_kernel_default-second_theta)**2;
 S10_cross=(first_kernel_default-first_theta)*(second_kernel_default-second_theta);

proc means data=V01_S_N noprint;
 var S01_first S01_second S01_cross;
 output out=S01_mean sum=sum_S01_first sum_S01_second sum_S01_cross;

data S01_mean;
 set S01_mean;
 S01_first=(sum_S01_first)/(_freq_-1);						* Note: DeLong formula has the sum divided by m-1(n-1), not m(n);
 S01_second=(sum_S01_second)/(_freq_-1);					* Note: DeLong formula has the sum divided by m-1(n-1), not m(n);
 S01_cross=(sum_S01_cross)/(_freq_-1); 						* When we have the sum divided by m(n), the DeLong method generates;
run;														* exactly the same variance as that of Bamber; 
															* They are slightly differently when the sum is dividend by m-1(n-1); 
data S01_mean;
 set S01_mean;
 rename _Freq_=num_survive;
 drop _TYPE_ sum_S01_first sum_S01_second sum_s01_cross;

run;

proc means data=V10_D_N noprint;
 var S10_first S10_second S10_cross;
 output out=S10_mean sum=sum_S10_first sum_S10_second sum_S10_cross;

 data S10_mean;
  Set S10_mean;
  S10_first=(sum_S10_first)/(_freq_-1);
  S10_second=(sum_S10_second)/(_freq_-1);
  S10_cross=(sum_S10_cross)/(_freq_-1);


data S10_mean;
 set S10_mean;
 rename _freq_=num_default;
 drop _Type_ Sum_S10_first Sum_S10_second sum_S10_cross;

 run;

data final;
 merge S10_mean S01_mean;

data final;
 set final;
 var_first=S10_first/num_default+S01_first/num_survive;
 var_second=S10_second/num_default+S01_second/num_survive;
 cov_var=S10_cross/num_default + S01_cross/num_survive;
 std_first=sqrt(var_first);
 std_second=sqrt(var_second);
 
data final;
 merge final theta;

data final;
 set final;
 t_stat=(first_theta-second_theta)**2/(var_first+var_second -2*cov_var);
 first_gini=(first_theta-0.5)*2;
 second_gini=(second_theta-0.5)*2; 

proc print data=final; var first_theta second_theta t_stat first_gini second_gini; run;							
